####################################################################################################
# This file creates the main hospital analytic dataset for the ambulance project.
#
# John Graves
# Created 18 May 2016
# 
#
# Government, Non-Federal
# 12	State
# 13	County
# 14	City
# 15	City-county
# 16	Hospital district or authority
# Nongovernment, not-for-profit	
# 21	Church operated
# 23	Other
# Investor-owned (for-profit)	
# 31	Individual
# 32	Partnership
# 33	Corporation
# Government, federal	
# 41	Air Force
# 42	Army
# 43	Navy
# 44	Public Health Service other than 47
# 45	Veterans Affairs
# 46	Federal other than 41-45, 47-48
# 47	Public Health Service Indian Service
# 48	Department of Justice
####################################################################################################

rm(list=ls())
pkg = list("haven","foreign","ggplot2","reshape","reshape2","plyr","ggthemes","stringr","Hmisc","dplyr","tidyr","DataCombine","zoo")
invisible(lapply(pkg, require, character.only = TRUE))
homedir = "~/Dropbox/hospital-measures/analysis/"

# Get First Instance and 3-Year Averages

mm = function(x) mean(x,na.rm=TRUE)
firstnm = function(x) x[!is.na(x)][1]
dummy = function(x) x


# Hospital Must Have At Least 3 Non-Missing Values
c1 = function(x) coalesce(x,lead(x,1))
c2 = function(x) coalesce(x,lead(x,2))
c3 = function(x) coalesce(x,lead(x,3))
c4 = function(x) coalesce(x,lead(x,4))
c5 = function(x) coalesce(x,lead(x,5))
c6 = function(x) coalesce(x,lead(x,6))
c7 = function(x) coalesce(x,lead(x,7))
c8 = function(x) coalesce(x,lead(x,8))
c9 = function(x) coalesce(x,lead(x,9))
c10 = function(x) coalesce(x,lead(x,10))

propmiss <- function(dataframe) {
  m <- sapply(dataframe, function(x) {
    data.frame(
      nmiss=sum(is.na(x)), 
      n=length(x), 
      propmiss=sum(is.na(x))/length(x)
    )
  })
  d <- data.frame(t(m))
  d <- sapply(d, unlist)
  d <- as.data.frame(d)
  d$variable <- row.names(d)
  row.names(d) <- NULL
  d <- cbind(d[ncol(d)],d[-ncol(d)])
  return(d[order(d$propmiss), ])
}

# Get 3-Year Rolling Average
yr3 <- function(x) rollmean(x, 3, na.pad=TRUE, align="right")
yr1 <- function(x) rollmean(x,1,na.pad=TRUE,align="right")
conc <- function(x) lag(x)
nafirst <- function(x) ifelse(is.na(x),firstnm(x),x)
nextval = function(x) ifelse(is.na(x),lead(x),x)
nextval2 = function(x) ifelse(is.na(x),lead(x,n=2),x)
lastval = function(x) ifelse(is.na(x),lag(x),x)
lastval2 = function(x) ifelse(is.na(x),lag(x,n=2),x)
lastval3 = function(x) ifelse(is.na(x),lag(x,n=3),x)
lastval4 = function(x) ifelse(is.na(x),lag(x,n=4),x)
lastval5 = function(x) ifelse(is.na(x),lag(x,n=5),x)
dummy = function(x) x



####
## 
# AHA Measures
##
####
setwd(homedir)
source("./sub-files/hospital-measure-functions.r")
source("./sub-files/aha-raw.r")
low.profit = c("brnhos","aidsshos","emdephos","psycahos","psyemhos","traumhos","alchhos","alcophos")
high.profit = c("adtchos","iclabhos","aclabhos","iclabhos","mrihos","ctscnhos","pethos","petcthos","ultsnhos","nichos","pedichos","fitchos","ortohos","sporthos","eswlhos","dradfhos","specthos","womhchos")
teach = c("mapp3","mapp8")
ids = c("id","Provider.ID","year")
other.aha = c("cntrl","hospbd","serv","admtot","ambhos" , "ambsys" , "ambven","npinum" )

aha.full = rbind.fill(aha2002,aha2003,aha2004,aha2005,aha2006,aha2007,aha2008,aha2009,aha2010,aha2011,aha2012,aha2013,aha2014)

AHA = aha.full[,c(ids,other.aha,low.profit,high.profit,teach,other.aha)]
AHA[is.na(AHA)] = 0
AHA[,low.profit ] = -1*AHA[,low.profit]
AHA$profit.index = apply(AHA[,c(low.profit,high.profit)],1,sum)
AHA$high.profit = apply(AHA[,c(high.profit)],1,sum)
AHA$low.profit = -1*apply(AHA[,c(low.profit)],1,sum)
AHA$coth = as.integer(AHA$mapp8==1) #Member of Council of Teaching Hospital of the Association of American Medical Colleges (COTH)
AHA$teach = as.integer(AHA$mapp3==1)  #Residency training approval by Accreditation Council for Graduate Medical Education
AHA$nonpr = as.integer(AHA$cntrl.1 %in% c(21,23))
AHA$gov = as.integer(AHA$cntrl.1 %in% c(12,13,14,15,16,41,42,43,44,45,46,47,48))
AHA$forpr = as.integer(AHA$cntrl.1 %in% c(31,32,33))
##
# Expand to Include 2000-2016
##
xx.AHA.pos = colnames(AHA)[3:dim(AHA)[2]] %>% as.character()
AHA3yr.Exp = expand.grid(Provider.ID = unique(AHA$Provider.ID),year=2000:2016)
AHA3yr.Full =merge(AHA,AHA3yr.Exp,by=c("Provider.ID","year"),all.y=TRUE) %>% arrange(Provider.ID,year) %>% group_by(Provider.ID) %>%
   mutate_at(vars(xx.AHA.pos),funs(lastval))  %>% mutate_at(vars(xx.AHA.pos),funs(lastval2))   %>% mutate_at(vars(xx.AHA.pos),funs(nafirst)  )

AHA3yr.Full %>% filter(Provider.ID=="440039")

####
## 
# Quality Measures
##
####

xx.ids = c("Provider.ID","year","month")
xx.outcome = c("ABS_MORT_CMP","MORT_30_AMI","MORT_30_PN","MORT_30_HF","ABS_READM_CMP","READM_30_AMI","READM_30_PN","READM_30_HF")
load(file=paste(homedir,"../data/files/Outcome_Measures_Long.Rdata",sep=""))

# Select only the last instance in each year.
Outcome = Outcome.Long[,c(xx.ids,xx.outcome)]; head(Outcome)
Outcome = as.data.frame(aggregate(Outcome, list(Outcome$Provider.ID,Outcome$year), FUN=tail, 1)); head(Outcome)
Outcome = Outcome[,- which(names(Outcome) %in% c("Group.1","Group.2","month"))]

Outcome.Imputed = Outcome %>%  arrange(Provider.ID,year) %>% group_by(Provider.ID) %>% 
  mutate_each_(funs(c1),xx.outcome) %>% 
  mutate_each_(funs(c2),xx.outcome) %>% 
  mutate_each_(funs(c3),xx.outcome) %>% 
  mutate_each_(funs(c4),xx.outcome) %>% 
  mutate_each_(funs(c5),xx.outcome) %>% 
  mutate_each_(funs(c6),xx.outcome) %>% 
  mutate_each_(funs(c7),xx.outcome) %>% 
  mutate_each_(funs(c8),xx.outcome) %>% 
  mutate_each_(funs(c9),xx.outcome) %>% 
  mutate_each_(funs(c10),xx.outcome) %>% arrange(Provider.ID,desc(year)) %>% 
  mutate_each_(funs(c1),xx.outcome) %>% 
  mutate_each_(funs(c2),xx.outcome) %>% 
  mutate_each_(funs(c3),xx.outcome) %>% 
  mutate_each_(funs(c4),xx.outcome) %>% 
  mutate_each_(funs(c5),xx.outcome) %>% 
  mutate_each_(funs(c6),xx.outcome) %>% 
  mutate_each_(funs(c7),xx.outcome) %>% 
  mutate_each_(funs(c8),xx.outcome) %>% 
  mutate_each_(funs(c9),xx.outcome) 

Expanded = expand.grid(Provider.ID = unique(Outcome$Provider.ID),
                       year = 2000:2016) 
Outcome.Expanded = merge(Outcome.Imputed,Expanded,by=c("Provider.ID","year"),all.y=TRUE) %>% 
  arrange(Provider.ID,year) %>% group_by(Provider.ID) %>% 
  mutate_each_(funs(c1),xx.outcome) %>% 
  mutate_each_(funs(c2),xx.outcome) %>% 
  mutate_each_(funs(c3),xx.outcome) %>% 
  mutate_each_(funs(c4),xx.outcome) %>% 
  mutate_each_(funs(c5),xx.outcome) %>% 
  mutate_each_(funs(c6),xx.outcome) %>% 
  mutate_each_(funs(c7),xx.outcome) %>% 
  mutate_each_(funs(c8),xx.outcome) %>% 
  mutate_each_(funs(c9),xx.outcome) %>% 
  mutate_each_(funs(c10),xx.outcome) %>% arrange(Provider.ID,desc(year)) %>% 
  mutate_each_(funs(c1),xx.outcome) %>% 
  mutate_each_(funs(c2),xx.outcome) %>% 
  mutate_each_(funs(c3),xx.outcome) %>% 
  mutate_each_(funs(c4),xx.outcome) %>% 
  mutate_each_(funs(c5),xx.outcome) %>% 
  mutate_each_(funs(c6),xx.outcome) %>% 
  mutate_each_(funs(c7),xx.outcome) %>% 
  mutate_each_(funs(c8),xx.outcome) %>% 
  mutate_each_(funs(c9),xx.outcome) 

Outcome.3yr.Full = Outcome.Expanded %>%  arrange(Provider.ID,year) %>% mutate_each_(funs(yr3,yr1,conc,dummy),xx.outcome) %>% select(-ends_with("dummy"))
Outcome.3yr.Full %>% filter(Provider.ID=="440039") %>% select(year,contains("ABS_MORT_CMP"))

# ####
# # Testing
# yr3 <- function(x) rollmean(x, 3, na.pad=TRUE, align="right")
# Outcome %>% select(Provider.ID,year,ABS_MORT_CMP) %>% 
#    group_by(Provider.ID) %>% arrange(Provider.ID,year) %>% mutate(non.na = sum(!is.na(ABS_MORT_CMP))) %>%
#   filter(non.na>=3) %>% mutate_each(funs(yr3,dummy),3) %>% mutate_each(funs(nafirst),5) %>% filter(Provider.ID=="440039")
#   #filter(Provider.ID %in% c("440039","010001")) %>%  mutate_each(funs(yr3,dummy),3)
# 
# Outcome3yr = Outcome %>% group_by(Provider.ID) %>% arrange(Provider.ID,year) %>%
#   mutate(non.na = sum(!is.na(ABS_MORT_CMP))) %>%  filter(non.na>=3) %>%
#   mutate_each(funs(yr3,dummy),xx.outcome.pos) %>% mutate_each(funs(nafirst),ends_with("yr3"))  %>%
#   select(-ends_with("dummy")) ; Outcome3yr
# 
# # End Testing
# #
# #####
composite.inputs = c("AMI_2"   ,   "HF_1"  ,     "HF_2"    ,   "HF_3" ,      "PN_6"    ,   "SCIP_INF_1" ,"SCIP_INF_3")
process.other <- c("AMI_4","HF_4","PN_4","PN_2","PN_7","AMI_1")
xx.process = c("ABS_PROC_CMP","ABS_PROC_AMI","ABS_PROC_PN","ABS_PROC_HF","ABS_PROC_JHE","PROC_CMP_JHE",composite.inputs,process.other)
load(file=paste(homedir,"../data/files/Process_Measures_Long.Rdata",sep=""))

# Select only the last instance in each year.
Process.Long$date = as.Date(Process.Long$date,format="%Y-%m-%d")
Process.Long$year =  as.numeric(format(Process.Long$date,"%Y"))
Process.Long$month = as.numeric(format(Process.Long$date,"%m"))
Process = Process.Long[,c(xx.ids,xx.process)]; head(Process)
Process = as.data.frame(aggregate(Process, list(Process$Provider.ID,Process$year), FUN=tail, 1)); head(Process)
Process = Process[,-which(names(Process) %in% c("Group.1","Group.2","month"))]

Process.Imputed = Process %>%  arrange(Provider.ID,year) %>% group_by(Provider.ID) %>% 
  mutate_each_(funs(c1),xx.process) %>% 
  mutate_each_(funs(c2),xx.process) %>% 
  mutate_each_(funs(c3),xx.process) %>% 
  mutate_each_(funs(c4),xx.process) %>% 
  mutate_each_(funs(c5),xx.process) %>% 
  mutate_each_(funs(c6),xx.process) %>% 
  mutate_each_(funs(c7),xx.process) %>% 
  mutate_each_(funs(c8),xx.process) %>% 
  mutate_each_(funs(c9),xx.process) %>% 
  mutate_each_(funs(c10),xx.process) %>% arrange(Provider.ID,desc(year)) %>% 
  mutate_each_(funs(c1),xx.process) %>% 
  mutate_each_(funs(c2),xx.process) %>% 
  mutate_each_(funs(c3),xx.process) %>% 
  mutate_each_(funs(c4),xx.process) %>% 
  mutate_each_(funs(c5),xx.process) %>% 
  mutate_each_(funs(c6),xx.process) %>% 
  mutate_each_(funs(c7),xx.process) %>% 
  mutate_each_(funs(c8),xx.process) %>% 
  mutate_each_(funs(c9),xx.process) 

Expanded = expand.grid(Provider.ID = unique(Process$Provider.ID),
                       year = 2000:2016) 
Process.Expanded = merge(Process.Imputed,Expanded,by=c("Provider.ID","year"),all.y=TRUE) %>% 
  arrange(Provider.ID,year) %>% group_by(Provider.ID) %>% 
  mutate_each_(funs(c1),xx.process) %>% 
  mutate_each_(funs(c2),xx.process) %>% 
  mutate_each_(funs(c3),xx.process) %>% 
  mutate_each_(funs(c4),xx.process) %>% 
  mutate_each_(funs(c5),xx.process) %>% 
  mutate_each_(funs(c6),xx.process) %>% 
  mutate_each_(funs(c7),xx.process) %>% 
  mutate_each_(funs(c8),xx.process) %>% 
  mutate_each_(funs(c9),xx.process) %>% 
  mutate_each_(funs(c10),xx.process) %>% arrange(Provider.ID,desc(year)) %>% 
  mutate_each_(funs(c1),xx.process) %>% 
  mutate_each_(funs(c2),xx.process) %>% 
  mutate_each_(funs(c3),xx.process) %>% 
  mutate_each_(funs(c4),xx.process) %>% 
  mutate_each_(funs(c5),xx.process) %>% 
  mutate_each_(funs(c6),xx.process) %>% 
  mutate_each_(funs(c7),xx.process) %>% 
  mutate_each_(funs(c8),xx.process) %>% 
  mutate_each_(funs(c9),xx.process) 

Process.3yr.Full = Process.Expanded %>%  arrange(Provider.ID,year) %>% mutate_each_(funs(yr3,yr1,conc,dummy),xx.process) %>% select(-ends_with("dummy"))
Process.3yr.Full %>% filter(Provider.ID=="440039") %>% select(year,contains("ABS_PROC_CMP"),contains("ABS_PROC_JHE"))
###


xx.HCAHPS = c("ABS_HCAHPS_CMP")
load(file=paste(homedir,"../data/files/HCAHPS_Measures_Long.Rdata",sep=""))

# Select only the last instance in each year.
HCAHPS.Long$date = as.Date(HCAHPS.Long$date,format="%Y-%m-%d")
HCAHPS.Long$year =  as.numeric(format(HCAHPS.Long$date,"%Y"))
HCAHPS.Long$month = as.numeric(format(HCAHPS.Long$date,"%m"))
HCAHPS = HCAHPS.Long[,c(xx.ids,xx.HCAHPS)]; head(HCAHPS)
HCAHPS = as.data.frame(aggregate(HCAHPS, list(HCAHPS$Provider.ID,HCAHPS$year), FUN=tail, 1)); head(HCAHPS)
HCAHPS = HCAHPS[,-which(names(HCAHPS) %in% c("Group.1","Group.2","month"))]


HCAHPS.Imputed = HCAHPS %>%  arrange(Provider.ID,year) %>% group_by(Provider.ID) %>% 
  mutate_each_(funs(c1),xx.HCAHPS) %>% 
  mutate_each_(funs(c2),xx.HCAHPS) %>% 
  mutate_each_(funs(c3),xx.HCAHPS) %>% 
  mutate_each_(funs(c4),xx.HCAHPS) %>% 
  mutate_each_(funs(c5),xx.HCAHPS) %>% 
  mutate_each_(funs(c6),xx.HCAHPS) %>% 
  mutate_each_(funs(c7),xx.HCAHPS) %>% 
  mutate_each_(funs(c8),xx.HCAHPS) %>% 
  mutate_each_(funs(c9),xx.HCAHPS) %>% 
  mutate_each_(funs(c10),xx.HCAHPS) %>% arrange(Provider.ID,desc(year)) %>% 
  mutate_each_(funs(c1),xx.HCAHPS) %>% 
  mutate_each_(funs(c2),xx.HCAHPS) %>% 
  mutate_each_(funs(c3),xx.HCAHPS) %>% 
  mutate_each_(funs(c4),xx.HCAHPS) %>% 
  mutate_each_(funs(c5),xx.HCAHPS) %>% 
  mutate_each_(funs(c6),xx.HCAHPS) %>% 
  mutate_each_(funs(c7),xx.HCAHPS) %>% 
  mutate_each_(funs(c8),xx.HCAHPS) %>% 
  mutate_each_(funs(c9),xx.HCAHPS) 

Expanded = expand.grid(Provider.ID = unique(HCAHPS$Provider.ID),
                       year = 2000:2016) 
HCAHPS.Expanded = merge(HCAHPS.Imputed,Expanded,by=c("Provider.ID","year"),all.y=TRUE) %>% 
  arrange(Provider.ID,year) %>% group_by(Provider.ID) %>% 
  mutate_each_(funs(c1),xx.HCAHPS) %>% 
  mutate_each_(funs(c2),xx.HCAHPS) %>% 
  mutate_each_(funs(c3),xx.HCAHPS) %>% 
  mutate_each_(funs(c4),xx.HCAHPS) %>% 
  mutate_each_(funs(c5),xx.HCAHPS) %>% 
  mutate_each_(funs(c6),xx.HCAHPS) %>% 
  mutate_each_(funs(c7),xx.HCAHPS) %>% 
  mutate_each_(funs(c8),xx.HCAHPS) %>% 
  mutate_each_(funs(c9),xx.HCAHPS) %>% 
  mutate_each_(funs(c10),xx.HCAHPS) %>% arrange(Provider.ID,desc(year)) %>% 
  mutate_each_(funs(c1),xx.HCAHPS) %>% 
  mutate_each_(funs(c2),xx.HCAHPS) %>% 
  mutate_each_(funs(c3),xx.HCAHPS) %>% 
  mutate_each_(funs(c4),xx.HCAHPS) %>% 
  mutate_each_(funs(c5),xx.HCAHPS) %>% 
  mutate_each_(funs(c6),xx.HCAHPS) %>% 
  mutate_each_(funs(c7),xx.HCAHPS) %>% 
  mutate_each_(funs(c8),xx.HCAHPS) %>% 
  mutate_each_(funs(c9),xx.HCAHPS) 

HCAHPS.3yr.Full = HCAHPS.Expanded %>%  arrange(Provider.ID,year) %>% mutate_each_(funs(yr3,yr1,conc,dummy),xx.HCAHPS) %>% select(-ends_with("dummy")) #%>% rename(ABS_HCAHPS_CMP_yr3=yr3,ABS_HCAHPS_CMP_yr1=yr1)
HCAHPS.3yr.Full %>% filter(Provider.ID=="440039") %>% select(year,contains("ABS_"))


AHA.F = AHA3yr.Full %>% filter(Provider.ID!="      " & Provider.ID!="    NA" & Provider.ID!="777777") ; head(AHA.F)
Outcome.F = Outcome.3yr.Full  ; head(Outcome.F)
Process.F = Process.3yr.Full ; head(Process.F)
HCAHPS.F = HCAHPS.3yr.Full ; head(HCAHPS.F)
names(HCAHPS.F)[4] = "ABS_HCAHPS_CMP_yr3"
#eb30.F = eb30.Full 

Final = merge(AHA.F,Outcome.F,by=c("Provider.ID","year"),all.x=TRUE,all.Y=TRUE); dim(Final)
Final = merge(Final,Process.F,by=c("Provider.ID","year"),all.x=TRUE,all.Y=TRUE); dim(Final)
Final = merge(Final,HCAHPS.F,by=c("Provider.ID","year"),all.x=TRUE,all.Y=TRUE); dim(Final)
#Final = merge(Final,eb30.F,by=c("Provider.ID","year"),all.x=TRUE,all.Y=TRUE); dim(Final)

Final$type = with(Final,ifelse(cntrl %in% 31:33,"For Profit",ifelse(cntrl %in% 21:23,"Non-Profit","Government" ))); table(Final$type)

names(Final) = tolower(names(Final))
Final %>% filter(provider.id=="010001") %>% select(year,abs_hcahps_cmp,abs_hcahps_cmp_yr3) 
Final %>% filter(provider.id=="010001") %>% select(year,abs_mort_cmp,abs_mort_cmp_yr3) 

propmiss(Final[which(Final$serv==10 & Final$year>=2002),grep("abs_mort_cmp",names(Final))])
Final %>% filter(provider.id=="010001") %>% select(year,pn_4_yr1) 


propmiss(Final[which(Final$serv==10),grep("abs_mort_cmp",names(Final))])
propmiss(Final[which(Final$serv==10),grep("abs_hcahps_cmp",names(Final))])
propmiss(Final[which(Final$serv==10),grep("abs_proc_cmp",names(Final))])

#write_dta(Final,path="~/Dropbox/Projects/hospital-quality-ambulance/data/aha-data-070516.dta")
#save(Final,file="/Volumes/age5.nber.org/hospital-quality/data/aha-data-070516.Rdata")

# write_dta(Final,path="~/Dropbox/Projects/hospital-quality-ambulance/data/aha-data-112116.dta")
# save(Final,file="/Volumes/age5.nber.org/hospital-quality/data/aha-data-112116.Rdata")

write_dta(Final,path="~/Dropbox/Projects/hospital-quality-ambulance/data/aha-data-120617.dta")
save(Final,file="/Volumes/age5.nber.org/hospital-quality/data/aha-data-120617.Rdata")


# Final.2017 <- Final
# load("/Volumes/age5.nber.org/hospital-quality/data/aha-data-112116.Rdata")
